import scrapy
from sougou.items import SougouItem
from fake_useragent import UserAgent


class SougouSpider(scrapy.Spider):
	name = 'sougou'
	allowed_domains = ['https://pinyin.sogou.com']
	start_urls = ['https://pinyin.sogou.com/dict/']
	ua = UserAgent()
	headers = {"User-Agent":ua.random}

	def parse(self, response):
		nodes = response.xpath("//div[@class='dict_category_list_title']")
		name_list = nodes.xpath(".//a/text()").extract()
		url_list = nodes.xpath(".//a/@href").extract()
		for node in range(len(name_list)):
			url = "https://pinyin.sogou.com"+url_list[node].split("?")[0]
			yield scrapy.Request(url,callback=self.parse_two,headers=self.headers,dont_filter=True,meta={"type1":name_list[node]})


	def parse_two(self,response):
		type1 = response.meta["type1"]
		dict_list = response.xpath("//div[@id='dict_detail_list']/div")
		dict_list.pop(0)
		for dic in dict_list:
			dict_name = dic.xpath("./div[1]/div/a/text()").extract()[0]
			dict_url = dic.xpath("./div[2]/div[2]/a/@href").extract()[0]
			item = SougouItem()
			item['dictName'] = type1+dict_name+".scel"
			item['dictUrl'] = dict_url
			yield item
		next_url = response.xpath("//div[@id='dict_page_list']/ul/li/span/a/@href").extract()[-1]
		if next_url:
			next_url = "https://pinyin.sogou.com" + next_url
			yield scrapy.Request(next_url,callback=self.parse_two,headers=self.headers,dont_filter=True,meta={"type1":type1})


